library(ggplot2)
library(plotly)
library(htmlwidgets)
library(egg)
library(gridExtra)
library(grid)
df_link <- "https://github.com/vetszabolcs/suicide_data/raw/main/master.csv"
df <- read.csv(df_link)
colnames(df)[1] <- "country"
df$per_country <- NA
for (y in unique(df$country)){
rows <- which(df$country == y)
df[rows, "per_country"] <- sum(df[rows,"suicides_no"]) / sum(df[rows,"population"]) * 100000
}
developing <- c("Algeria","Egypt","Libya","Morocco","Sudan","Tunisia","Western Sahara","British Indian Ocean Territory","Burundi","Comoros","Djibouti","Eritrea","Ethiopia","French Southern Territories","Kenya","Madagascar","Malawi","Mauritius","Mayotte","Mozambique","Réunion","Rwanda","Seychelles","Somalia","South Sudan","Uganda","United Republic of Tanzania","Zambia","Zimbabwe","Angola","Cameroon","Central African Republic","Chad","Congo","Democratic Republic of the Congo","Equatorial Guinea","Gabon","Sao Tome and Principe","Botswana","Eswatini","Lesotho","Namibia","South Africa","Benin","Burkina Faso","Cabo Verde","Côte d’Ivoire","Gambia","Ghana","Guinea","Guinea-Bissau","Liberia","Mali","Mauritania","Niger","Nigeria","Saint Helena","Senegal","Sierra Leone","Togo","Anguilla","Antigua and Barbuda","Aruba","Bahamas","Barbados","Bonaire","British Virgin Islands","Cayman Islands","Cuba","Curaçao","Dominica","Dominican Republic","Grenada","Guadeloupe","Haiti","Jamaica","Martinique","Montserrat","Puerto Rico","Saint Barthélemy","Saint Kitts and Nevis","Saint Lucia","Saint Martin (French Part)","Saint Vincent and the Grenadines","Sint Maarten (Dutch part)","Trinidad and Tobago","Turks and Caicos Islands","United States Virgin Islands","Belize","Costa Rica","El Salvador","Guatemala","Honduras","Mexico","Nicaragua","Panama","Argentina","Bolivia (Plurinational State of)","Bouvet Island","Brazil","Chile","Colombia","Ecuador","Falkland Islands (Malvinas)","French Guiana","Guyana","Paraguay","Peru","South Georgia and the South Sandwich Islands","Suriname","Uruguay","Venezuela (Bolivarian Republic of)","Kazakhstan","Kyrgyzstan","Tajikistan","Turkmenistan","Uzbekistan","China","China","China","Democratic People's Republic of Korea","Mongolia","Republic of Korea","Brunei Darussalam","Cambodia","Indonesia","Lao People's Democratic Republic","Malaysia","Myanmar","Philippines","Singapore","Thailand","Timor-Leste","Viet Nam","Afghanistan","Bangladesh","Bhutan","India","Iran (Islamic Republic of)","Maldives","Nepal","Pakistan","Sri Lanka","Armenia","Azerbaijan","Bahrain","Georgia","Iraq","Jordan","Kuwait","Lebanon","Oman","Qatar","Saudi Arabia","State of Palestine","Syrian Arab Republic","Turkey","United Arab Emirates","Yemen","Fiji","New Caledonia","Papua New Guinea","Solomon Islands","Vanuatu","Guam","Kiribati","Marshall Islands","Micronesia (Federated States of)","Nauru","Northern Mariana Islands","Palau","United States Minor Outlying Islands","American Samoa","Cook Islands","French Polynesia","Niue","Pitcairn","Samoa","Tokelau","Tonga","Tuvalu","Wallis and Futuna Islands")
developing <- unique(gsub("-|\\(.*", "", developing))
developed <- c("Bermuda","Canada","Greenland","Saint Pierre and Miquelon","United States of America","Japan","Cyprus","Israel","Belarus","Bulgaria","Czech Republic","Hungary","Poland","Republic of Moldova","Romania","Russian Federation","Slovakia","Ukraine","Åland Islands","Guernsey","Jersey","Sark","Denmark","Estonia","Faroe Islands","Finland","Iceland","Ireland","Isle of Man","Latvia","Lithuania","Norway","Svalbard and Jan Mayen Islands","Sweden","United Kingdom of Great Britain and Northern Ireland","Albania","Andorra","Bosnia and Herzegovina","Croatia","Gibraltar","Greece","Holy See","Italy","Malta","Montenegro","North Macedonia","Portugal","San Marino","Serbia","Slovenia","Spain","Austria","Belgium","France","Germany","Liechtenstein","Luxembourg","Monaco","Netherlands","Switzerland","Australia","Christmas Island","Cocos (Keeling) Islands","Heard Island and McDonald Islands","New Zealand","Norfolk Island")
developed <- unique(gsub("-|\\(.*", "", developed))
developed[grep("United", developed, ignore.case = T)][1] <- "United States"
developed[grep("United", developed, ignore.case = T)][2] <- "United Kingdom"
post_soc <- c("Czech Republic", "Estonia", "Lithuania", "Poland",
"Slovakia", "Hungary", "Croatia", "Latvia", "Romania", "Russian Federation",
"Bulgaria", "Serbia", "Ukraine")
west <- c("Austria","Belgium","France","Germany","Liechtenstein","Luxembourg","Monaco",
"Netherlands","Switzerland")
east <- c("Belarus","Bulgaria","Czech Republic","Hungary","Poland","Republic of Moldova","Romania","Russian Federation","Slovakia",
"Ukraine")
north <- c("Denmark","Estonia","Faroe Islands","Finland","Iceland","Ireland","Isle of Man","Latvia","Lithuania","Norway","Svalbard and Jan Mayen Islands","Sweden","United Kingdom")
south <- c("Albania","Andorra","Bosnia and Herzegovina","Croatia","Gibraltar","Greece","Holy See","Italy","Malta","Montenegro","North Macedonia","Portugal","San Marino","Serbia","Slovenia","Spain")
df$developed <- NA
df$developed[df$country %in% developed] <- 1
df$developed[df$country %in% developing] <- 0
developed[grep("Czech", developed)] <- "Czech Republic"
developing[grep("Vinc", developing)] <- "Saint Vincent and Grenadines"
df <- df[!df$country=="Macau",]
df$developed[df$country %in% developed] <- "Developed"
df$developed[df$country %in% developing] <- "Developing"
eur <- df[df$country %in% west | df$country %in% east | df$country %in% south |
df$country %in% north | df$country %in% post_soc,]
eur$post_soc <- "Non-postsocialist"
eur$post_soc[eur$country %in% post_soc] <- "Postsocialist"
eur$post_soc <- as.character(eur$post_soc)
eur$region <- NA
eur$region[eur$country %in% east] <- "East"
eur$region[eur$country %in% west] <- "West"
eur$region[eur$country %in% north] <- "North"
eur$region[eur$country %in% south] <- "South"
aggreGate <- function(data_f, by="country", by2="year", new_col){
new_df <- data_f
for (x in unique(new_df[,by])){
for (y in unique(new_df[,by2])){
rows <- which(new_df[,by]== x & new_df[,by2]== y)
new_df[rows, new_col] <- sum(new_df[rows,"suicides_no"]) / sum(new_df[rows,"population"]) * 100000
}}
return(new_df)}
df$yearly <- NA
for (y in unique(df$year)){
rows <- which(df$year == y)
df[rows, "yearly"] <- sum(df[rows,"suicides_no"]) / sum(df[rows,"population"]) * 100000
}
gg0 <- ggplot(df, aes(year, yearly, colour = yearly*-1))+
geom_line(size=1.5)+
theme(legend.position = "none")+
scale_x_continuous(breaks = seq(1985, 2016, 6))+
labs(title = "Yearly suicide", x = "Year", y = "Suicide / 100k people")+
theme(axis.title.x = element_text(hjust = 0.515,
margin=margin(5,0,0,0)))+
theme(axis.title = element_text(face = "bold", size = 12))+
theme(plot.title = element_text(face = "bold", size = 14, hjust = 0.5))+
theme(axis.text = element_text(size=10))
gg0
my_theme <- theme(axis.title.x = element_text(hjust = 0.52,
margin=margin(5,0,0,0)))+
theme(axis.title = element_text(face = "bold", size = 14))+
theme(plot.title = element_text(face = "bold", size = 16, hjust = 0.5))+
theme(axis.text = element_text(size=10))
df <- aggreGate(df, "year", "developed", "dev_yearly")
gg1 <- ggplot(df, aes(year, dev_yearly, group = developed, colour = developed))+
geom_line(size=1.5)+
labs(title = "Yearly suicide in developed vs. developing countires",
x = "Year", y = "Suicide / 100k people")+
scale_x_continuous(breaks = seq(1985, 2016, 6))+
theme(legend.title = element_blank())+
my_theme+
theme(legend.text = element_text(size = 10))
gg1
gg2 <- ggplot(df, aes(developed, dev_yearly))+
geom_point(alpha = 0.05, position=position_jitter(height=.05, width=.38),
colour = "red")+
geom_boxplot(aes(colour = dev_yearly), alpha = 0.5, outlier.colour = NA)+
labs(title = "Yearly suicide in developed vs. developing countires",
x = "Country type", y = "Suicide / 100k people")+
my_theme
gg2
df$per_country <- NA
for (y in unique(df$country)){
rows <- which(df$country == y)
df[rows, "per_country"] <- sum(df[rows,"suicides_no"]) / sum(df[rows,"population"]) * 100000
}
df <- aggreGate(df, "country", "year", "country_yearly")
df <- df[order(c(df$per_country), decreasing = T),]
df$country <- factor(x =df$country, levels = unique(df$country))
top10 <- ggplot(df[df$country %in% head(unique(df$country), 10),],
aes(x=year, y=country_yearly,
colour=country, group=country,
text = paste("Country:", country, "\n",
"Year: ", year, "\n",
"Suicide/100k: ", round(country_yearly), sep="")))+
geom_line(size=1.1)+
labs(title = "Yearly suicide in top 10 countries",
x = "Year", y = "Suicide / 100k people",
color = "Country")+
theme(legend.title = element_text(face = "bold", size = 16))+
theme(legend.text = element_text(size = 14))+
theme(axis.title.x = element_text(hjust = 0.515))+
scale_x_continuous(breaks = seq(1985, 2016, 6))+
my_theme+
geom_point(aes(color=country), cex = 1.5)
ggplotly(top10+theme(legend.title = element_blank()), tooltip = "text") %>%
layout(margin = list(t = 75))
eur <- aggreGate(eur, "country", "year", "country_yearly")
eur <- eur[order(eur$per_country, decreasing = T),]
eur$country <- factor(eur$country, levels = unique(eur$country))
top10_eur <- ggplot(eur[eur$country %in% head(unique(eur$country), 10),],
aes(x=year, y=country_yearly,
colour=country, group=country,
text = paste("Country:", country, "\n",
"Year: ", year, "\n",
"Suicide/100k: ", round(country_yearly), sep="")))+
geom_line(size=1.1)+
labs(title = "Yearly suicide in top 10 european countries",
x = "Year", y = "Suicide / 100k people",
color = "Country")+
theme(legend.title = element_text(face = "bold", size = 16))+
theme(legend.text = element_text(size = 12))+
theme(axis.title.x = element_text(hjust = 0.515))+
scale_x_continuous(breaks = seq(1985, 2016, 6))+
my_theme+
geom_point(aes(color=country), cex = 1.5)
ggplotly(top10_eur+theme(legend.title = element_blank()), tooltip = "text") %>%
layout(margin = list(t = 75))
eur <- aggreGate(eur, "region","age","per_region_age")
eur$age <- factor(eur$age, levels = c("5-14 years", "15-24 years", "25-34 years",
"35-54 years","55-74 years", "75+ years"))
gg5 <- ggplot(eur,aes(region, per_region_age, fill = age))+
geom_bar(stat = "identity", position = "dodge")+
ggtitle("European regions and age")+
xlab("Region")+ylab("Suicide / 100k people")+
labs(fill="Age")+
theme(text = element_text(size = 14))+
theme(legend.title = element_text(face = "bold", size=12))+
my_theme+
scale_fill_hue(l = 60, c = 50)
gg5
eur <- aggreGate(eur, "region", "sex", "per_region_sex")
eur$sex[eur$sex == "female"] <- "Female"
eur$sex[eur$sex == "male"] <- "Male"
gg6 <- ggplot(eur,aes(region, per_region_sex, fill = sex))+
geom_bar(stat = "identity", position = "dodge")+
ggtitle("European regions and sex")+
xlab("Region")+ylab("Suicide / 100k people")+
labs(fill="Sex")+
theme(text = element_text(size = 14))+
theme(legend.title = element_text(face = "bold", size=12))+
my_theme+
scale_fill_hue(l = 60, c = 50)
gg6
df$c_code <- countrycode::countryname(df$country, "iso3c")
df <- aggreGate(df, new_col = "year_country")
df$year_country <- round(df$year_country, 1)
df$hover <- paste(df$country, paste(df$year_country, " /100k", sep = ""), sep = "\n")
names(df)[names(df) == "year_country"] <- "Suicide"
label <- list(bgcolor = "black",
bordercolor = "transparent",
font = list(size = 15, color = "white"))
plot_geo(df[df$year>=1989 & df$year<2016,], locationmode = "countries",
frame = ~year,
width = 800, height = 800) %>%
add_trace(locations = ~c_code,
z = ~Suicide,
zmin = mean(df$Suicide[df$year>=1989 & df$year<2016])/3,
zmax = mean(df$Suicide[df$year>=1989 & df$year<2016])*3,
color = ~Suicide,
colorscale = "RdBu",
text = ~hover,
hoverinfo = "text") %>%
layout(geo = list(scope = "europe"),
title = list(text = "<b>Yearly suicides in european countries</b>",
y = 0.95, font = list(size = 20)),
margin = list(t = 200)) %>%
style(hoverlabel = label) %>%
colorbar(y = 0.85, ticksuffix = "/100k")
facet <- arrangeGrob(gg0+theme(axis.title.x = element_blank()),
gg5+theme(legend.position = "right",
legend.title = element_blank()),
top10+theme(axis.title.x = element_blank())+
theme(legend.position = "bottom"),
gg4+theme(legend.position = "right"),
gg1+theme(axis.title.x = element_blank())+
theme(legend.position = "bottom"),
gg6+theme(legend.position = "right",
legend.title = element_blank()), nrow = 3, ncol=2,
top = textGrob("Suicide rates from 1987 to 2016\n",
gp=gpar(fontsize=20, fontface="bold"))) %>%
ggplotify::as.ggplot() %>%
ggsave("facet.png",plot = ., device = "png",
width = 25, height = 15, dpi = 150)
knitr::include_graphics("facet.png")